datadir <- '~/UROP'
source(file.path(datadir, 'R/unsupervised.R'))
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## Attaching SeuratObject
source(file.path(datadir, 'R/analysis.R'))
## Loading required package: ggplot2
## Loading required package: lattice
We analyze our model’s performance on simulated spatial datasets of varying pixel resolutions This enables us to evaluate the model’s performance on predicting proportions in a more realistic setting and lets us demonstrate the model’s full mode. The simulated pucks were generated using the procedure outlined by STdeconvolve on MERFISH data from Moffit et al. 2018.
truth <- readRDS(file.path(datadir, 'Objects/vignette_v1/MERFISH_truth_20um2.rds'))
my_table = truth$annotDf
my_table$Cell_class <- factor(my_table$Cell_class)
n_levels = length(levels(my_table$Cell_class))
my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)]
pres = unique(as.integer(my_table$Cell_class))
pres = pres[order(pres)]
if(n_levels > 21)
my_pal = pals::polychrome(n_levels)
plot <- ggplot2::ggplot(my_table, ggplot2::aes(x=Centroid_X, y=Centroid_Y)) + ggplot2::geom_point(ggplot2::aes(size = .15, shape=19,color=Cell_class)) +
ggplot2::scale_color_manual(values = my_pal[pres])+ ggplot2::scale_shape_identity() + ggplot2::theme_bw() + ggplot2::scale_size_identity()
plot
puck <- readRDS(file.path(datadir, '/Objects/vignette_v1/MERFISH_puck_20um2.rds'))
cell_type_info <- initialize.clusters(puck, resolution = 0.15)
gene_list <- puck@counts@Dimnames[[1]]
myRCTD <- create.object(puck, cell_type_info, gene_list_reg = gene_list)
myRCTD <- run.algorithm(myRCTD)
saveRDS(myRCTD, file.path(datadir, 'Objects/vignette_v1/MERFISH_20um2_final.rds'))
For the purposes of the analysis, ground truth pixels with a minority UMI proportion of at least 0.2 were considered doublets.
RCTD_list <- readRDS(file.path(datadir, 'Objects/vignette_v1/MERFISH_20um2_final.rds'))
RCTD_pred <- RCTD_list[[length(RCTD_list)]]
pred_results <- RCTD_pred@results$results_df
truth_results <- as.data.frame(truth$gtSpotTopics[rownames(pred_results),])
doublet_certain <- apply(truth_results, MARGIN = 1, function(x) length(which(x >= 0.2)) == 2)
singlet <- apply(truth_results, MARGIN = 1, function(x) max(x) > 0.8)
first_type <- apply(truth_results, MARGIN = 1, function(x) names(which(x == sort(x, decreasing = TRUE)[1]))[1])
second_type <- apply(truth_results, MARGIN = 1, function(x) names(which(x == sort(x, decreasing = TRUE)[2]))[1])
truth_results$spot_class <- 'reject'
truth_results[doublet_certain,]$spot_class <- 'doublet_certain'
truth_results[singlet,]$spot_class <- 'singlet'
truth_results$first_type <- first_type
truth_results$second_type <- second_type
truth_results <- truth_results[,c('spot_class', 'first_type', 'second_type')]
plot_all_cell_types <- function(results_df, coords, cell_type_names, resultsdir, size=0.15) {
barcodes = rownames(results_df[results_df$spot_class != "reject" & results_df$first_type %in% cell_type_names,])
my_table = coords[barcodes,]
my_table$class = results_df[barcodes,]$first_type
n_levels = length(levels(my_table$class))
my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)]
pres = unique(as.integer(my_table$class))
pres = pres[order(pres)]
if(n_levels > 21)
my_pal = pals::polychrome(n_levels)
if(n_levels > 36)
stop("Plotting currently supports at most 36 cell types as colors")
plot <- ggplot2::ggplot(my_table, ggplot2::aes(x=x, y=y)) + ggplot2::geom_point(ggplot2::aes(size = size, shape=19,color=class)) +
ggplot2::scale_color_manual(values = my_pal[pres])+ ggplot2::scale_shape_identity() + ggplot2::theme_bw() + ggplot2::scale_size_identity()
pdf(file.path(resultsdir,"all_cell_types.pdf"))
invisible(print(plot))
dev.off()
return(plot)
}
plot_all_cell_types(RCTD_pred@results$results_df, RCTD_pred@originalSpatialRNA@coords, RCTD_pred@cell_type_info$renorm[[2]], '..', size=0.3)
We can begin by looking at singlet assignment, since at this resolution, most pixels will only contain one cell type.
singlet_table <- function(truth_results, pred_results) {
truth_singlet <- truth_results[truth_results$spot_class == 'singlet',]
pred_singlet <- pred_results[pred_results$spot_class == 'singlet',]
common_barcode <- intersect(row.names(truth_singlet), row.names(pred_singlet))
truth_singlet <- truth_singlet[common_barcode,]
pred_singlet <- pred_singlet[common_barcode,]
truth_types = unlist(list(truth_singlet[,'first_type']))
pred_types = unlist(list(pred_singlet[,'first_type']))
return(table(truth_types, pred_types))
}
mytable <- singlet_table(truth_results, pred_results)
mytable
## pred_types
## truth_types 0 1 2 3 4 5 6 7 8
## Astrocyte 0 268 0 0 0 0 2 0 1
## Endothelial 2 1 187 0 0 0 0 0 1
## Ependymal 0 0 0 0 0 0 123 0 0
## Excitatory 344 1 0 0 0 54 1 74 52
## Inhibitory 21 1 0 0 2 0 0 0 939
## Microglia 3 2 2 3 0 0 1 1 1
## OD Immature 0 1 0 0 96 0 1 0 0
## OD Mature 0 0 0 267 0 0 0 0 0
## Pericytes 0 1 22 0 0 0 0 0 0
We can visualize how the cell type assignments change by plotting the assignment of excitatory and inhibitory neurons as a function of their expression of excitatory and inhibitory neuronal marker genes.
First, we can determine the marker genes with the ground truth data.
get_marker_data <- function(cell_type_names, cell_type_means, gene_list) {
marker_means = cell_type_means[gene_list,]
marker_norm = marker_means / rowSums(marker_means)
marker_data = as.data.frame(cell_type_names[max.col(marker_means)])
marker_data$max_epr <- apply(cell_type_means[gene_list,],1,max)
colnames(marker_data) = c("cell_type",'max_epr')
rownames(marker_data) = gene_list
marker_data$log_fc <- 0
epsilon <- 1e-9
for(cell_type in unique(marker_data$cell_type)) {
cur_genes <- gene_list[marker_data$cell_type == cell_type]
other_mean = rowMeans(cell_type_means[cur_genes,cell_type_names != cell_type])
marker_data$log_fc[marker_data$cell_type == cell_type] <- log(epsilon + cell_type_means[cur_genes,cell_type]) - log(epsilon + other_mean)
}
return(marker_data)
}
puck <- readRDS(file.path(datadir, '/Objects/vignette_v1/MERFISH_puck_20um2.rds'))
truth <- readRDS(file.path(datadir, 'Objects/vignette_v1/MERFISH_truth_20um2.rds'))
cur_cell_types <- c('Astrocyte', 'Endothelial', 'Ependymal', 'Excitatory', 'Inhibitory', 'OD Immature', 'OD Mature')
cell_type_info_restr = list(t(truth$gtCtGenes))
cell_type_info_restr[[1]] = cell_type_info_restr[[1]][,cur_cell_types]
cell_type_info_restr[[2]] = cur_cell_types; cell_type_info_restr[[3]] = length(cur_cell_types)
de_genes <- get_de_genes(cell_type_info_restr, puck, fc_thresh = 1, expr_thresh = .0001, MIN_OBS = 3)
## get_de_genes: Astrocyte found DE genes: 5
## get_de_genes: Endothelial found DE genes: 13
## get_de_genes: Ependymal found DE genes: 10
## get_de_genes: Excitatory found DE genes: 25
## get_de_genes: Inhibitory found DE genes: 26
## get_de_genes: OD Immature found DE genes: 5
## get_de_genes: OD Mature found DE genes: 8
## get_de_genes: total DE genes: 79
marker_data_de = get_marker_data(cell_type_info_restr[[2]], cell_type_info_restr[[1]], de_genes)
Next, we can plot cell type assignments across iterations.
excitatory_genes <- rownames(marker_data_de)[marker_data_de$cell_type == "Excitatory"]
inhibitory_genes <- rownames(marker_data_de)[marker_data_de$cell_type == "Inhibitory"]
RCTD_list <- readRDS(file.path(datadir, 'Objects/vignette_v1/MERFISH_20um2_final.rds'))
generate_marker_plot <- function(i) {
results_df <- RCTD_list[[i]]@results$results_df
barcodes <- rownames(results_df[results_df$spot_class != 'reject' & results_df$first_type == '0', ])
plot_df <- data.frame(colSums(puck@counts[excitatory_genes, barcodes]) / puck@nUMI[barcodes], colSums(puck@counts[inhibitory_genes, barcodes]) / puck@nUMI[barcodes])
colnames(plot_df) = c('Excitatory','Inhibitory')
plot_df$type = "Excitatory"
plot_df_final <- plot_df[puck@nUMI[barcodes] >= 300,]
barcodes <- rownames(results_df[results_df$spot_class != 'reject' & results_df$first_type == '8', ])
plot_df <- data.frame(colSums(puck@counts[excitatory_genes, barcodes]) / puck@nUMI[barcodes], colSums(puck@counts[inhibitory_genes, barcodes]) / puck@nUMI[barcodes])
colnames(plot_df) = c('Excitatory','Inhibitory')
plot_df$type = "Inhibitory"
plot_df_final <- rbind(plot_df_final, plot_df[puck@nUMI[barcodes] >= 300,])
MULT <- 500
plot_df_final$Excitatory <- MULT * plot_df_final$Excitatory
plot_df_final$Inhibitory <- MULT * plot_df_final$Inhibitory
my_pal = pals::coolwarm(20)
p <- ggplot2::ggplot(plot_df_final) +
ggplot2::geom_point(ggplot2::aes(x=Excitatory,y=Inhibitory, color = type),alpha = 0.2,size=1) +
ggplot2::coord_fixed() + ggplot2::theme_classic() +
labs(color="Cluster Cell Type") + guides(color = guide_legend(override.aes = list(size = 3,alpha=1))) +
scale_color_manual(values=c(my_pal[1], my_pal[20]))+ theme(legend.position="top") + xlab('Excitatory Markers') + ylab('Inhibitory Markers') + scale_x_continuous(breaks = c(0,100,200), limits = c(0,300)) + scale_y_continuous(breaks = c(0,100,200), limits = c(0,300)) + ggtitle(sprintf('Iteration %s', i))
p
}
plots = list()
for (i in 1:length(RCTD_list)) {
plots[[i]] <- generate_marker_plot(i)
}
ggarrange(plots[[1]], plots[[3]], plots[[5]], plots[[7]], plots[[9]], plots[[11]], plots[[13]], plots[[15]], plots[[17]], nrow = 3, ncol = 3, common.legend = T)
To quantify the change in cell type assignment across iterations more succinctly, we can plot the mean difference between inhibitory and excitatory gene expression as a function of iteration number for both cell types. The error regions represent one standard deviation.
generate_marker_data <- function(i) {
results_df <- RCTD_list[[i]]@results$results_df
barcodes <- rownames(results_df[results_df$spot_class != 'reject' & results_df$first_type == '0', ])
plot_df <- data.frame(colSums(puck@counts[excitatory_genes, barcodes]) / puck@nUMI[barcodes], colSums(puck@counts[inhibitory_genes, barcodes]) / puck@nUMI[barcodes])
colnames(plot_df) = c('Excitatory','Inhibitory')
plot_df$type = "Excitatory"
plot_df_final <- plot_df[puck@nUMI[barcodes] >= 300,]
barcodes <- rownames(results_df[results_df$spot_class != 'reject' & results_df$first_type == '8', ])
plot_df <- data.frame(colSums(puck@counts[excitatory_genes, barcodes]) / puck@nUMI[barcodes], colSums(puck@counts[inhibitory_genes, barcodes]) / puck@nUMI[barcodes])
colnames(plot_df) = c('Excitatory','Inhibitory')
plot_df$type = "Inhibitory"
plot_df_final <- rbind(plot_df_final, plot_df[puck@nUMI[barcodes] >= 300,])
return(plot_df_final)
}
df <- data.frame(matrix(ncol = 3, nrow = length(RCTD_list)))
colnames(df) <- c('iteration', 'mean', 'sd')
df$iteration <- as.numeric(rownames(df))
for (i in 1:length(RCTD_list)) {
data <- generate_marker_data(i)
data$diff <- data$Inhibitory - data$Excitatory
df[i, 'mean'] <- mean(data[data$type == 'Excitatory', ]$diff)
df[i, 'sd'] <- sd(data[data$type == 'Excitatory', ]$diff)
}
df$type <- 'Excitatory'
df_final <- df
df <- data.frame(matrix(ncol = 3, nrow = length(RCTD_list)))
colnames(df) <- c('iteration', 'mean', 'sd')
df$iteration <- as.numeric(rownames(df))
for (i in 1:length(RCTD_list)) {
data <- generate_marker_data(i)
data$diff <- data$Inhibitory - data$Excitatory
df[i, 'mean'] <- mean(data[data$type == 'Inhibitory', ]$diff)
df[i, 'sd'] <- sd(data[data$type == 'Inhibitory', ]$diff)
}
df$type <- 'Inhibitory'
df_final <- rbind(df_final, df)
MULT <- 500
df_final$mean <- MULT * df_final$mean
df_final$sd <- MULT * df_final$sd
my_pal = pals::coolwarm(20)
ggplot(data=df_final, aes(x=iteration, y=mean, ymin=mean-sd, ymax=mean+sd, fill=type)) +
geom_line() +
geom_ribbon(alpha=0.5) +
labs(fill="Cell Type") +
xlab('Iteration') +
ylab('Mean Expression Difference (Inhibitory - Excitatory)') +
theme_minimal() +
scale_y_continuous(breaks = c(-200,-100,0,100,200), limits = c(-200,200)) +
scale_fill_manual(values=c(my_pal[1], my_pal[20]))
With the marker gene data, we can also plot the doublet mode generated proportion of excitatory and inhibitory on each pixel in marker gene space. We will only look at pixels where the top two candidate cell types are excitatory and inhibitory neurons.
results_df <- RCTD_list[[length(RCTD_list)]]@results$results_df
barcodes <- rownames(results_df[results_df$spot_class != 'reject' & results_df$first_type == '8' & results_df$second_type == '0', ])
plot_df <- data.frame(colSums(puck@counts[excitatory_genes, barcodes]) / puck@nUMI[barcodes], colSums(puck@counts[inhibitory_genes, barcodes]) / puck@nUMI[barcodes])
colnames(plot_df) = c('Excitatory','Inhibitory')
plot_df$proportion = RCTD_list[[i]]@results$weights_doublet[barcodes, ][, 'first_type']
plot_df_final <- plot_df[puck@nUMI[barcodes] >= 300,]
barcodes <- rownames(results_df[results_df$spot_class != 'reject' & results_df$first_type == '0' & results_df$second_type == '8', ])
plot_df <- data.frame(colSums(puck@counts[excitatory_genes, barcodes]) / puck@nUMI[barcodes], colSums(puck@counts[inhibitory_genes, barcodes]) / puck@nUMI[barcodes])
colnames(plot_df) = c('Excitatory','Inhibitory')
plot_df$proportion = RCTD_list[[i]]@results$weights_doublet[barcodes, ][, 'second_type']
plot_df_final <- rbind(plot_df_final, plot_df[puck@nUMI[barcodes] >= 300,])
MULT <- 500
plot_df_final$Excitatory <- MULT * plot_df_final$Excitatory
plot_df_final$Inhibitory <- MULT * plot_df_final$Inhibitory
ggplot2::ggplot(plot_df_final) + ggplot2::geom_point(ggplot2::aes(x=Excitatory,y=Inhibitory, color = proportion),alpha = 1,size=0.3) + ggplot2::coord_fixed() + ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + labs(color="Inhibitory Proportion") + ggplot2::scale_colour_gradientn(colors = pals::brewer.rdbu(20)[2:19]) + xlab('Excitatory Markers') + ylab('Inhibitory Markers') + scale_x_continuous(breaks = c(0,100,200), limits = c(0,300)) + scale_y_continuous(breaks = c(0,100,200), limits = c(0,300))
Finally, treating excitatory and inhibitory neurons as subtypes of one cell type, we can run the unsupervised model’s full mode on the pixels represented by one of the two neuron cell types so that each pixel gets some proportion of each cell type.
We first run the cell subtype model, which is the same as running full mode.
RCTD_list <- readRDS(file.path(datadir, 'Objects/vignette_v1/MERFISH_20um2_final.rds'))
RCTD_pred <- RCTD_list[[length(RCTD_list)]]
gene_list <- RCTD_pred@internal_vars$gene_list_reg
neuron_RCTD <- initialize.subtypes(RCTD_pred, c('0', '5', '7', '8'), resolution = 0.02, gene_list = gene_list)
neuron_RCTD <- run.subtypes(neuron_RCTD)
saveRDS(neuron_RCTD, file.path(datadir, 'Objects/vignette_v1/MERFISH_neurons.rds'))
We can then look at the proportion of the two neuron types in each pixel, plotted in marker gene space.
RCTD_list <- readRDS(file.path(datadir, 'Objects/vignette_v1/MERFISH_neurons.rds'))
results_df <- RCTD_list[[length(RCTD_list)]]@results$weights
barcodes <- rownames(results_df)
plot_df <- data.frame(colSums(puck@counts[excitatory_genes, barcodes]) / puck@nUMI[barcodes], colSums(puck@counts[inhibitory_genes, barcodes]) / puck@nUMI[barcodes])
colnames(plot_df) = c('Excitatory','Inhibitory')
plot_df$proportion = results_df[, 'subtype_0'] / (results_df[, 'subtype_0'] + results_df[, 'subtype_1'])
plot_df_final <- plot_df[puck@nUMI[barcodes] >= 300,]
MULT <- 500
plot_df_final$Excitatory <- MULT * plot_df_final$Excitatory
plot_df_final$Inhibitory <- MULT * plot_df_final$Inhibitory
ggplot2::ggplot(plot_df_final) + ggplot2::geom_point(ggplot2::aes(x=Excitatory,y=Inhibitory, color = proportion),alpha = 1,size=0.3) + ggplot2::coord_fixed() + ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + labs(color="Inhibitory Proportion") + ggplot2::scale_colour_gradientn(colors = pals::brewer.rdbu(20)) + xlab('Excitatory Markers') + ylab('Inhibitory Markers') + scale_x_continuous(breaks = c(0,100,200), limits = c(0,300)) + scale_y_continuous(breaks = c(0,100,200), limits = c(0,300))
puck <- readRDS(file.path(datadir, '/Objects/vignette_v1/MERFISH_puck_50um2.rds'))
gene_list <- puck@counts@Dimnames[[1]]
cell_type_info <- initialize.clusters(puck, resolution = 0.6)
myRCTD <- create.object(puck, cell_type_info, gene_list_reg = gene_list)
myRCTD <- run.algorithm(myRCTD, doublet_mode = 'full')
saveRDS(myRCTD, file.path(datadir, 'Objects/vignette_v1/MERFISH_50um2_final.rds'))
RCTD_list <- readRDS(file.path(datadir, 'Objects/vignette_v1/MERFISH_50um2_final.rds'))
RCTD_pred <- RCTD_list[[length(RCTD_list)]]
truth <- readRDS(file.path(datadir, 'Objects/vignette_v1/MERFISH_truth_50um2.rds'))
pred_results <- RCTD_pred@results$weights
#for (i in 1:dim(pred_results)[1]) {
# pred_results[i,] = pred_results[i,] / rowSums(pred_results)[i]
#}
truth_results <- truth$gtSpotTopics[rownames(pred_results),]
We make a heatmap of gene expression correlation between the ground truth and predicted gene expression profiles.
pred_gene <- RCTD_pred@cell_type_info$renorm[[1]]
truth_gene <- t(truth$gtCtGenes)
gene_correlation <- cor(pred_gene, truth_gene)
heatmap(gene_correlation)
We can assign predicted cell types to ground truth cell types by generating a heatmap of proportion correlation. The assignments are boxed.
true_types <- c('Pericytes', 'Microglia', 'Ependymal', 'OD Immature', 'Endothelial', 'OD Mature', 'Astrocyte', 'Excitatory', 'Inhibitory')
pred_types <- c('7', '3', '1', '6', '5', '4', '8', '2', '0')
correlation_matrix <- cor(truth_results, data.matrix(pred_results))
data <- melt(correlation_matrix)
data$diag = FALSE
data[data$Var1 == 'Ependymal' & data$Var2 == '7', ]$diag = TRUE
data[data$Var1 == 'OD Immature' & data$Var2 == '3', ]$diag = TRUE
data[data$Var1 == 'Endothelial' & data$Var2 == '1', ]$diag = TRUE
data[data$Var1 == 'OD Mature' & data$Var2 == '6', ]$diag = TRUE
data[data$Var1 == 'Astrocyte' & data$Var2 == '5', ]$diag = TRUE
data[data$Var1 == 'Excitatory' & data$Var2 %in% c('4','8'), ]$diag = TRUE
data[data$Var1 == 'Inhibitory' & data$Var2 %in% c('2','0'), ]$diag = TRUE
data$diag[!data$diag] <- NA
ggplot(data, aes(factor(Var1, levels=true_types), factor(Var2, levels=pred_types), fill= value)) +
geom_tile() +
theme_classic() +
scale_fill_gradientn(colors = pals::brewer.rdbu(20)[2:19], limits=c(-1,1), name='Correlation') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab('True Cell Type')+ ylab('Predicted Cell Type') +
geom_tile(data = data[!is.na(data$diag), ], aes(color = diag), size = 0.7) +
scale_color_manual(guide = FALSE, values = c('TRUE' = "#D55E00"))
We can also plot the absolute error proportion for each pixel for each cell type. Plotting us lets us see model performance as a function of ground truth cell type.
ependymal_df <- as.data.frame(abs(pred_results[, '7'] - truth_results[, 'Ependymal']))
colnames(ependymal_df) <- 'value'
ependymal_df$type <- 'Ependymal'
od_immature_df <- as.data.frame(abs(pred_results[, '3'] - truth_results[, 'OD Immature']))
colnames(od_immature_df) <- 'value'
od_immature_df$type <- 'OD Immature'
endothelial_df <- as.data.frame(abs(pred_results[, '1'] - truth_results[, 'Endothelial']))
colnames(endothelial_df) <- 'value'
endothelial_df$type <- 'Endothelial'
od_mature_df <- as.data.frame(abs(pred_results[, '6'] - truth_results[, 'OD Mature']))
colnames(od_mature_df) <- 'value'
od_mature_df$type <- 'OD Mature'
astrocyte_df <- as.data.frame(abs(pred_results[, '5'] - truth_results[, 'Astrocyte']))
colnames(astrocyte_df) <- 'value'
astrocyte_df$type <- 'Astrocyte'
excitatory_df <- as.data.frame(abs(pred_results[, '4'] + pred_results[, '8'] - truth_results[, 'Excitatory']))
colnames(excitatory_df) <- 'value'
excitatory_df$type <- 'Excitatory'
inhibitory_df <- as.data.frame(abs(pred_results[, '2'] + pred_results[, '0'] - truth_results[, 'Inhibitory']))
colnames(inhibitory_df) <- 'value'
inhibitory_df$type <- 'Inhibitory'
abserr_df <- rbind(ependymal_df, od_immature_df, endothelial_df, od_mature_df, astrocyte_df, excitatory_df, inhibitory_df)
abserr_df$type <- as.factor(abserr_df$type)
ggplot(abserr_df, aes(x=type, y=value)) + geom_boxplot(width=0.1, outlier.shape = NA) + theme_minimal() + ylab('Absolute Error') + xlab('Cell Type') + ylim(0, 0.4)
We first run STdeconvolve on the dataset.
library(STdeconvolve)
puck <- readRDS(file.path(datadir, '/Objects/vignette_v1/MERFISH_puck_50um2.rds'))
cd <- puck@counts
ldas <- fitLDA(t(cd), Ks = 9)
optLDA <- optimalModel(models = ldas, opt = "min")
results <- getBetaTheta(optLDA, perc.filt = 0.05, betaScale = 1000)
deconProp <- results$theta
deconGexp <- results$beta
Then we can compare to the ground truth.
pred_gene <- t(deconGexp)
truth_gene <- t(truth$gtCtGenes)
gene_correlation <- cor(pred_gene, truth_gene)
heatmap(gene_correlation)
true_types <- c('Pericytes', 'Microglia', 'Ependymal', 'OD Immature', 'Endothelial', 'OD Mature', 'Astrocyte', 'Excitatory', 'Inhibitory')
pred_types <- c('3', '9', '6', '5', '1', '8', '7', '2', '4')
decon_results <- deconProp[rownames(truth_results), ]
correlation_matrix <- cor(truth_results, decon_results)
data <- melt(correlation_matrix)
data$diag = FALSE
data[data$Var1 == 'Pericytes' & data$Var2 == '3', ]$diag = TRUE
data[data$Var1 == 'OD Immature' & data$Var2 == '9', ]$diag = TRUE
data[data$Var1 == 'Endothelial' & data$Var2 == '6', ]$diag = TRUE
data[data$Var1 == 'OD Mature' & data$Var2 == '5', ]$diag = TRUE
data[data$Var1 == 'Astrocyte' & data$Var2 == '1', ]$diag = TRUE
data[data$Var1 == 'Excitatory' & data$Var2 %in% c('7','8'), ]$diag = TRUE
data[data$Var1 == 'Inhibitory' & data$Var2 %in% c('2','4'), ]$diag = TRUE
data$diag[!data$diag] <- NA
ggplot(data, aes(factor(Var1, levels = true_types), factor(Var2, levels = pred_types), fill= value)) +
geom_tile() +
theme_classic() +
scale_fill_gradientn(colors = pals::brewer.rdbu(20)[2:19], limits=c(-1,1), name='Correlation') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab('True Cell Type')+ ylab('Predicted Cell Type') +
geom_tile(data = data[!is.na(data$diag), ], aes(color = diag), size = 0.7) +
scale_color_manual(guide = FALSE, values = c('TRUE' = "#D55E00"))
We can compare the pixel proportion RMSE between the two methods.
unsupervised_rmse <- as.data.frame(sqrt((
(pred_results[, '7'] - truth_results[, 'Ependymal']) ^ 2 +
(pred_results[, '3'] - truth_results[, 'OD Immature']) ^ 2 +
(pred_results[, '1'] - truth_results[, 'Endothelial']) ^ 2 +
(pred_results[, '6'] - truth_results[, 'OD Mature']) ^ 2 +
(pred_results[, '5'] - truth_results[, 'Astrocyte']) ^ 2 +
(pred_results[, '4'] + pred_results[, '8'] - truth_results[, 'Excitatory']) ^ 2 +
(pred_results[, '2'] + pred_results[, '0'] - truth_results[, 'Inhibitory']) ^ 2
) / 7))
colnames(unsupervised_rmse) <- 'value'
unsupervised_rmse$method <- 'Unsupervised'
decon_rmse <- as.data.frame(sqrt((
(decon_results[, '3'] - truth_results[, 'Pericytes']) ^ 2 +
(decon_results[, '9'] - truth_results[, 'OD Immature']) ^ 2 +
(decon_results[, '6'] - truth_results[, 'Endothelial']) ^ 2 +
(decon_results[, '5'] - truth_results[, 'OD Mature']) ^ 2 +
(decon_results[, '1'] - truth_results[, 'Astrocyte']) ^ 2 +
(decon_results[, '7'] + decon_results[, '8'] - truth_results[, 'Excitatory']) ^ 2 +
(decon_results[, '2'] + decon_results[, '4'] - truth_results[, 'Inhibitory']) ^ 2
) / 7))
colnames(decon_rmse) <- 'value'
decon_rmse$method <- 'STdeconvolve'
rmse <- rbind(unsupervised_rmse, decon_rmse)
ggplot(rmse, aes(x=method, y=value)) +
geom_violin(trim=T, alpha=0.1) + geom_boxplot(width=0.1) + theme_minimal() + ylab('RMSE') + xlab('Method')
We can also plot the comparison factored by cell type.
abserr_df <- abserr_df[abserr_df$type != 'Ependymal', ]
abserr_df$type <- as.factor(abserr_df$type)
abserr_df$method <- 'Unsupervised'
od_immature_df <- as.data.frame(abs(decon_results[, '9'] - truth_results[, 'OD Immature']))
colnames(od_immature_df) <- 'value'
od_immature_df$type <- 'OD Immature'
endothelial_df <- as.data.frame(abs(decon_results[, '6'] - truth_results[, 'Endothelial']))
colnames(endothelial_df) <- 'value'
endothelial_df$type <- 'Endothelial'
od_mature_df <- as.data.frame(abs(decon_results[, '5'] - truth_results[, 'OD Mature']))
colnames(od_mature_df) <- 'value'
od_mature_df$type <- 'OD Mature'
astrocyte_df <- as.data.frame(abs(decon_results[, '1'] - truth_results[, 'Astrocyte']))
colnames(astrocyte_df) <- 'value'
astrocyte_df$type <- 'Astrocyte'
excitatory_df <- as.data.frame(abs(decon_results[, '7'] + decon_results[, '8'] - truth_results[, 'Excitatory']))
colnames(excitatory_df) <- 'value'
excitatory_df$type <- 'Excitatory'
inhibitory_df <- as.data.frame(abs(decon_results[, '2'] + decon_results[, '4'] - truth_results[, 'Inhibitory']))
colnames(inhibitory_df) <- 'value'
inhibitory_df$type <- 'Inhibitory'
decon_df <- rbind(od_immature_df, endothelial_df, od_mature_df, astrocyte_df, excitatory_df, inhibitory_df)
decon_df$type <- as.factor(decon_df$type)
decon_df$method <- 'STdeconvolve'
mae <- rbind(abserr_df, decon_df)
ggplot(mae, aes(x=type, y=value, fill=factor(method))) + geom_boxplot(width=0.25, outlier.shape = NA) + theme_minimal() + ylab('Absolute Error') + xlab('Cell Type') + ylim(0, 0.4)
plot_puck_continuous(
RCTD_pred@originalSpatialRNA,
colnames(RCTD_pred@originalSpatialRNA@counts),
RCTD_pred@results$weights[,'6'],
size=3,
my_pal = pals::brewer.blues(20)[2:20]
)
plot_puck_continuous(
RCTD_pred@originalSpatialRNA,
colnames(RCTD_pred@originalSpatialRNA@counts),
RCTD_pred@results$weights[,'7'],
size=3,
my_pal = pals::brewer.blues(20)[2:20]
)
data <- as.data.frame(cbind(pred_results[, '6'], truth_results[, 'OD Mature']))
colnames(data) <- c('pred','truth')
my_pal = pals::coolwarm(20)
p1 <- ggplot(data, aes(x=truth, y=pred, colour="blue")) +
geom_point(color=my_pal[1]) +
theme_classic() +
xlab('True Mature OD Proportion') +
ylab('Predicted Mature OD Proportion') +
scale_y_continuous(breaks = c(0,0.5,1), limits = c(-.03,1.03)) +
scale_x_continuous(breaks = c(0,0.5,1), limits = c(-.03,1.03)) +
theme(legend.position = "none") +
geom_abline(slope=1, color = my_pal[20])
p1
# proportion RMSE
sqrt(sum((pred_results[, '6'] - truth_results[, 'OD Mature']) ** 2) / length(pred_results[,'6']))
## [1] 0.05866743
data <- as.data.frame(cbind(pred_results[, '7'], truth_results[, 'Ependymal']))
colnames(data) <- c('pred','truth')
my_pal = pals::coolwarm(20)
p2 <- ggplot(data, aes(x=truth, y=pred, colour="blue")) +
geom_point(color=my_pal[1]) +
theme_classic() +
xlab('True Ependymal Proportion') +
ylab('Predicted Ependymal Proportion') +
scale_y_continuous(breaks = c(0,0.5,1), limits = c(-.03,1.03)) +
scale_x_continuous(breaks = c(0,0.5,1), limits = c(-.03,1.03)) +
theme(legend.position = "none") +
geom_abline(slope=1, color = my_pal[20])
p2
# proportion RMSE
sqrt(sum((pred_results[, '7'] - truth_results[, 'Ependymal']) ** 2) / length(pred_results[,'7']))
## [1] 0.05266362